rm(list = ls())
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.2 ✔ purrr 0.3.3
## ✔ tibble 3.0.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readxl)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
Q1
Daily test counts by Parish
Daily_counts=read_excel("daily.xlsx")
Daily_counts=tibble(Daily_counts)
ggplot(data = Daily_counts) +
geom_point(mapping = aes(x = Lab_Collection_Date, y = Daily_Test_Count)) +
facet_wrap(~Parish, nrow = 8) +
labs(title = "Daily test counts by Parish")

Daily Negative test counts by Parish
ggplot(data = Daily_counts) +
geom_point(mapping = aes(x = Lab_Collection_Date, y = Daily_Negative_Test_Count)) +
facet_wrap(~Parish, nrow = 8) +
labs(title = "Daily Negative test counts by Parish")

Daily Positive test counts by Parish
ggplot(data = Daily_counts) +
geom_point(mapping = aes(x = Lab_Collection_Date, y = Daily_Positive_Test_Count)) +
facet_wrap(~Parish, nrow = 8) +
labs(title = "Daily Positive test counts by Parish")

Weekly total case count by Age group
Age_group=read.csv("age.csv")
ggplot(data = Age_group, mapping = aes(x = Week, y = Weekly_Case_Count)) +
geom_point(mapping = aes(color = Age_Range)) +
labs(title = "Weekly total case count by Age group", position = "jitter")

Weekly case count by Age group and Region
Age_region=read.csv("Age_region.csv")
ggplot(data = Age_region) +
geom_point(mapping = aes(x = Week, y = Weekly_Case_Count, color = Location)) +
facet_wrap(~ Age_Range, nrow = 3) +
labs(title = "Weekly case count by Age group and Region", position = "jitter")

Weekly total case count by Gender
gender=read.csv("gender.csv")
ggplot(data = gender, mapping = aes(x = Week, y = Weekly_Case_Count)) +
geom_point(mapping = aes(color = Gender)) +
labs(title = "Weekly total case count by Gender", position = "jitter")

Weekly case count by Gender and Region
gender_region=read.csv("gender_region.csv")
ggplot(data = gender_region) +
geom_point(mapping = aes(x = Week, y = Weekly_Case_Count, color = Gender)) +
facet_wrap(~ Location, nrow = 4)+
labs(title = "Weekly case count by Gender and Region")

Weekly case count by Gender and Parish
gender_parish=read.csv("gender_parish.csv")
ggplot(data = gender_parish) +
geom_point(mapping = aes(x = Week, y = Weekly_Case_Count, color = Gender)) +
facet_wrap(~ Location, nrow = 8)+
labs(title = "Weekly case count by Gender and Parish")

Daily case count by Race group of State LA
Race1=read_excel("Race.xlsx")
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i =
## sheet, : Expecting numeric in O2696 / R2696C15: got 'N/A'
Race1=tibble(Race1)
Race2=filter(Race1, State == 'LA')
Race2=Race2 %>%
separate(Date, into = c("year", "date"), sep = 4)
Race2=Race2 %>%
separate(date, into = c("month", "day"), sep = 2)
Race2=Race2 %>%
unite(Date,year, month, day, sep = "-")
Race2$Date=as.Date.factor(Race2$Date)
Race3=select(Race2,Date,State,Cases_White:Cases_Ethnicity_Unknown)
Race3=Race3 %>%
pivot_longer(c(`Cases_White`,`Cases_Black`,`Cases_LatinX`,`Cases_Asian`,`Cases_AIAN`,`Cases_NHPI`,`Cases_Multiracial`,`Cases_Other`,`Cases_Unknown`,`Cases_Ethnicity_Hispanic`,`Cases_Ethnicity_NonHispanic`,`Cases_Ethnicity_Unknown`), names_to = "Race", values_to = "daily_case")
ggplot(data = Race3) +
geom_point(mapping = aes(x = Date, y = daily_case)) +
facet_wrap(~ Race, nrow = 4) +
labs(title = "Daily case count by Race group of State LA")
## Warning: Removed 398 rows containing missing values (geom_point).

Daily death count by Race group of State LA
Race5=select(Race2,Date,State,Deaths_White:Deaths_Ethnicity_Unknown)
Race5=Race5 %>%
pivot_longer(c(`Deaths_White`:`Deaths_Ethnicity_Unknown`), names_to = "Race", values_to = "daily_death")
ggplot(data = Race5) +
geom_point(mapping = aes(x = Date, y = daily_death)) +
facet_wrap(~ Race, nrow = 4) +
labs(title = "Daily death count by Race group of State LA")
## Warning: Removed 107 rows containing missing values (geom_point).

Q2
Daily_counts=read_excel("daily.xlsx")
combine1=Daily_counts %>%
select(Lab_Collection_Date,Parish,Daily_Case_Count)
combine1=combine1 %>%
pivot_wider(names_from = Parish, values_from = Daily_Case_Count)
Race1=read_excel("Racecopy.xlsx")
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i =
## sheet, : Expecting numeric in O2696 / R2696C15: got 'N/A'
combine2=Race1 %>%
select(Lab_Collection_Date,Cases_Total)
combine3=left_join(combine1, combine2, by = "Lab_Collection_Date")
combine3=combine3 %>%
pivot_longer(c(`Acadia`:`Winn`), names_to = "Parish", values_to = "Daily_Cases")
print(combine3)
## # A tibble: 182,208 x 4
## Lab_Collection_Date Cases_Total Parish Daily_Cases
## <dttm> <dbl> <chr> <dbl>
## 1 2020-03-01 00:00:00 NA Acadia 0
## 2 2020-03-01 00:00:00 NA Allen 0
## 3 2020-03-01 00:00:00 NA Ascension 0
## 4 2020-03-01 00:00:00 NA Assumption 0
## 5 2020-03-01 00:00:00 NA Avoyelles 0
## 6 2020-03-01 00:00:00 NA Beauregard 0
## 7 2020-03-01 00:00:00 NA Bienville 0
## 8 2020-03-01 00:00:00 NA Bossier 0
## 9 2020-03-01 00:00:00 NA Caddo 0
## 10 2020-03-01 00:00:00 NA Calcasieu 0
## # … with 182,198 more rows
Plot
ggplot(data = combine3) +
geom_point(mapping = aes(x = Lab_Collection_Date, y = Daily_Cases, color = Cases_Total)) +
facet_wrap(~ Parish, nrow = 8)

Findings
#By combining daily cases from two sources—by parish and by race, we created a new table. Fluctuations in daily case counts happened mostly in Calcasieu, East Baton Rouge, Jefferson, Orleans, and Lafayette parishes.
Q3
New Tests
louisiana_history=read_excel("louisiana_history.xlsx")
newtest=select(louisiana_history, date, state, totalTestResultsIncrease)
newtest=newtest %>%
group_by(state) %>%
mutate(newtesttotal_new = rollmean(totalTestResultsIncrease, k=7, align="left", fill=NA)) %>%
ungroup()
newtest=head(newtest,90)
newtest1=newtest %>%
pivot_longer(names_to = "newtesttotal_key", values_to = "newtesttotal_value", newtesttotal_new)
newtest %>%
ggplot(aes(x = date, y = totalTestResultsIncrease))+
geom_col(alpha= 0.4,fill="#9966FF",linetype = 0)+
geom_line(data = newtest1, mapping = aes(x = date, y = newtesttotal_value), color ="#333366", size = 1)+
theme_bw()+
labs(x = "", y = "")+
labs(title = "New tests (Calculated)", subtitle = "Total test result (People)")

New cases
newcases=select(louisiana_history, date, state, positiveIncrease)
newcases=newcases %>%
group_by(state) %>%
mutate(newcases_new = rollmean(positiveIncrease, k = 7, fill = NA, align = "left")) %>%
ungroup()
newcases=head(newcases,90)
newcases1=newcases %>%
pivot_longer(names_to = "newcases_key", values_to = "newcases_value", newcases_new)
newcases %>%
ggplot(aes(x = date, y = positiveIncrease)) +
geom_col(alpha= 0.4,fill="#FF6633",linetype = 0)+
geom_line(data = newcases1, mapping = aes(x = date, y = newcases_value), color ="#FF0000", size = 1)+
theme_bw()+
labs(x = "", y = "")+
labs(title = "New cases (Calculated)")

Current hospitalizations
currenthos=select(louisiana_history, date, state, hospitalizedCurrently)
currenthos=currenthos %>%
group_by(state) %>%
mutate(currenthos_new = rollmean(hospitalizedCurrently, k = 7, fill = NA, align = "left")) %>%
ungroup()
currenthos=head(currenthos,90)
currenthos1=currenthos %>%
pivot_longer(names_to = "currenthos_key", values_to = "currenthos_value", currenthos_new)
currenthos %>%
ggplot(aes(x = date, y = hospitalizedCurrently)) +
geom_col(alpha= 0.4,fill="#3399FF",linetype = 0)+
geom_line(data = currenthos1, mapping = aes(x = date, y = currenthos_value), color ="#0066CC", size = 1)+
theme_bw()+
labs(x = "", y = "")+
labs(title = "Current hospitalizations")

New deaths
newdeaths=select(louisiana_history, date, state, deathIncrease)
newdeaths=newdeaths %>%
group_by(state) %>%
mutate(newdeaths_new = rollmean(deathIncrease, k = 7, fill = NA, align = "left")) %>%
ungroup()
newdeaths=head(newdeaths,90)
newdeaths1=newdeaths %>%
pivot_longer(names_to = "newdeaths_key", values_to = "newdeaths_value", newdeaths_new)
newdeaths %>%
ggplot(aes(x = date, y = deathIncrease)) +
geom_col(alpha= 0.4,fill="#666666",linetype = 0)+
geom_line(data = newdeaths1, mapping = aes(x = date, y = newdeaths_value), color ="#000000", size = 1)+
theme_bw()+
labs(x = "", y = "")+
labs(title = "New deaths")

Q4
Compile a tibble that contains cumulative percent positivity of tests by Parish
testbyweek=read_excel("test by week.xlsx", col_types = c("text", "text", "numeric", "date", "date", "numeric", "numeric", "numeric", "numeric"))
cumulative=testbyweek %>%
count(`Weekly Positive Test Count`) %>%
mutate(cumulative2 = cumsum(n)/sum(n))
cumulative=select(cumulative, -n)
left_join(testbyweek,cumulative,by="Weekly Positive Test Count")
## # A tibble: 33,150 x 10
## Parish Tract `Week (Thur-Wed… `Date for start of… `Date for end of w…
## <chr> <chr> <dbl> <dttm> <dttm>
## 1 Acadia 2200… 1 2020-02-27 00:00:00 2020-03-04 00:00:00
## 2 Acadia 2200… 2 2020-03-05 00:00:00 2020-03-11 00:00:00
## 3 Acadia 2200… 3 2020-03-12 00:00:00 2020-03-18 00:00:00
## 4 Acadia 2200… 4 2020-03-19 00:00:00 2020-03-25 00:00:00
## 5 Acadia 2200… 5 2020-03-26 00:00:00 2020-04-01 00:00:00
## 6 Acadia 2200… 6 2020-04-02 00:00:00 2020-04-08 00:00:00
## 7 Acadia 2200… 7 2020-04-09 00:00:00 2020-04-15 00:00:00
## 8 Acadia 2200… 8 2020-04-16 00:00:00 2020-04-22 00:00:00
## 9 Acadia 2200… 9 2020-04-23 00:00:00 2020-04-29 00:00:00
## 10 Acadia 2200… 10 2020-04-30 00:00:00 2020-05-06 00:00:00
## # … with 33,140 more rows, and 5 more variables: `Weekly Test
## # Count` <dbl>, `Weekly Negative Test Count` <dbl>, `Weekly Positive
## # Test Count` <dbl>, `Weekly Case Count` <dbl>, cumulative2 <dbl>
Plot the cumulative percent positivity of tests of Parish on a map for the weeks containing dates
March 22, 2020 (stay at home order starts)
testbyweek$`Date for start of week`=as.Date(testbyweek$`Date for start of week`)
wk1=filter(testbyweek, `Date for start of week` == '2020-03-19')
cumwk1=wk1 %>%
count(`Weekly Positive Test Count`) %>%
mutate(cumwk11 = cumsum(n)/sum(n))
cumwk1=select(cumwk1, -n)
ggplot(data = cumwk1, mapping = aes(x=`Weekly Positive Test Count`, y=cumwk11))+
geom_line(color = "blue")+
geom_point(color = "black")+
labs(x = "Week 1", y = "Cumulative Percent Positivity")+
labs(title = "March 22, 2020")

May 15, 2020 (stay at home order lifted, phase 1 starts)
testbyweek$`Date for start of week`=as.Date(testbyweek$`Date for start of week`)
wk2=filter(testbyweek, `Date for start of week` == '2020-05-14')
cumwk2=wk2 %>%
count(`Weekly Positive Test Count`) %>%
mutate(cumwk22 = cumsum(n)/sum(n))
cumwk2=select(cumwk2, -n)
ggplot(data = cumwk2, mapping = aes(x=`Weekly Positive Test Count`, y=cumwk22))+
geom_line(color = "red")+
geom_point(color = "black")+
labs(x = "Week 2", y = "Cumulative Percent Positivity")+
labs(title = "May 15, 2020")

June 5, 2020 (phase 2 starts)
testbyweek$`Date for start of week`=as.Date(testbyweek$`Date for start of week`)
wk3=filter(testbyweek, `Date for start of week` == '2020-06-04')
cumwk3=wk3 %>%
count(`Weekly Positive Test Count`) %>%
mutate(cumwk33 = cumsum(n)/sum(n))
cumwk3=select(cumwk3, -n)
ggplot(data = cumwk3, mapping = aes(x=`Weekly Positive Test Count`, y=cumwk33))+
geom_line(color = "yellow")+
geom_point(color = "black")+
labs(x = "Week 3", y = "Cumulative Percent Positivity")+
labs(title = "June 5, 2020")

July 13, 2020 (Mask mandate)
testbyweek$`Date for start of week`=as.Date(testbyweek$`Date for start of week`)
wk4=filter(testbyweek, `Date for start of week` == '2020-07-09')
cumwk4=wk4 %>%
count(`Weekly Positive Test Count`) %>%
mutate(cumwk44 = cumsum(n)/sum(n))
cumwk4=select(cumwk4, -n)
ggplot(data = cumwk4, mapping = aes(x=`Weekly Positive Test Count`, y=cumwk44))+
geom_line(color = "green")+
geom_point(color = "black")+
labs(x = "Week 4", y = "Cumulative Percent Positivity")+
labs(title = "July 13, 2020")

Q5
new cases by lab collection date with a 7-day average line of New Orleans
caseno=filter(Daily_counts, Parish=="Orleans")
caseno=caseno %>%
mutate(sevendayaverage1=rollmean(Daily_Case_Count, k=7, fill=NA))
ggplot(data = caseno, mapping = aes(x=Lab_Collection_Date, y=Daily_Case_Count))+
geom_line(data = caseno, mapping = aes(x = Lab_Collection_Date, y = sevendayaverage1, group = 1), color = "red", size =1)+
geom_col(alpha = 0.5, fill = "blue")+
labs(title = "new cases by lab collection date of New Orleans", x = "Lab Collection Date", y = "New Cases")
## Warning: Removed 6 row(s) containing missing values (geom_path).

new cases by lab collection date with a 7-day average line of Baton Rouge
casebr=filter(Daily_counts, Parish=="East Baton Rouge")
casebr=casebr %>%
mutate(sevendayaverage2=rollmean(Daily_Case_Count, k=7, fill=NA))
ggplot(data = casebr, mapping = aes(x=Lab_Collection_Date, y=Daily_Case_Count))+
geom_line(data = casebr, mapping = aes(x = Lab_Collection_Date, y = sevendayaverage2, group = 1), color = "purple", size =1)+
geom_col(alpha = 0.5, fill = "orange")+
labs(title = "new cases by lab collection date of Baton Rouge", x = "Lab Collection Date", y = "New Cases")
## Warning: Removed 6 row(s) containing missing values (geom_path).

Findings
# In New Orleans, new cases started to increase rapidly since March and peaked in April. A big drop after stay home order was lifted. Even though there were fluctuations in the new cases since May, the numbers are much lower than March's and April's numbers. Baton Rouge has an opposite tendency. In Baton Rouge, new cases started to increase rapidly since July when mask became mandate, but started to slowly decrease since August.